# REPLICATION
# Place-Based Campaigning: The Political Impact of Real Grassroots Mobilization
# Daniel Bischof and Thomas Kurer
# Journal of Politics

# Figures produced in R
  # Descriptives of Events over time presented in Figure 1
  # Text Analysis presented in Figure 3, Figure A.1 and Table A.2

# ggplot theme
theme_set(theme_bw() + theme(text = element_text(size=14)))


# define colors to match Stata scheme

DB_blue <- rgb(32, 86, 165, maxColorValue = 255)
DB_bluelight <- rgb(79, 190, 255, maxColorValue = 255)
DB_gold <- rgb(179, 111, 20, maxColorValue = 255)
DB_greenish <- rgb(179, 176, 46, maxColorValue = 255)
DB_grey <- rgb(109, 137, 153, maxColorValue = 255)
DB_orange <- rgb(223, 169, 90, maxColorValue = 255)
DB_red <- rgb(255, 105, 110, maxColorValue = 255)
DB_red2 <- rgb(204, 43, 49, maxColorValue = 255)
DB_redlight <- rgb(255, 158, 143, maxColorValue = 255)
DB_turqoise <- rgb(79, 255, 225, maxColorValue = 255)

# Descriptives (Figure 1)

m5s_data <- readtext("./../../data_original/M5S/0_events_20181206.csv", text_field = "event_description")

m5s_data$event_year <- as.numeric(substr(m5s_data$time,1,4))
# some missing
m5s_data$event_year_sub <- as.numeric(substr(m5s_data$event_created,1,4))
m5s_data$event_year <- ifelse(is.na(m5s_data$event_year), m5s_data$event_year_sub, m5s_data$event_year)
m5s_data$event_month <- as.numeric(substr(m5s_data$time,6,7))
m5s_data$event_day <- as.numeric(substr(m5s_data$time,9,10))
m5s_data$event_date <- as.Date(paste(m5s_data$event_year, m5s_data$event_month, m5s_data$event_day, sep="-"))

# Nr of groups over time
groups <- m5s_data %>% 
  dplyr::select(group_id, group_created, group_name, group_location, group_url) %>% 
  group_by(group_id) %>% dplyr::filter(row_number(group_id) == 1) %>%
  dplyr::rename(id=group_id, date=group_created,name=group_name, loc=group_location, url=group_url)

groups$date <- as.Date(substr(groups$date, 1, 10))
groups$one <- 1

groups <- groups %>% ungroup() %>% arrange(date) %>% mutate(one=1, nrgroups=cumsum(one))

# Nr of events over time

monthly <- m5s_data %>% dplyr::mutate(one=1) %>% 
  group_by(date=floor_date(event_date, "month")) %>%
  dplyr::summarise(nr=sum(one)) %>%
  dplyr::filter(!is.na(date))

monthly$year <- substr(monthly$date,1,4)
monthly$month <- substr(monthly$date,6,7)

# Plots ----

# CUMULATIVE NR OF GROUPS

ggplot(groups, aes(x=date, y=nrgroups)) + 
  # EVENTS
  # Grillo suggests use of Meetup
  geom_segment(aes(x = as.Date('16/07/2005', format="%d/%m/%Y"), y=0, xend=as.Date('16/04/2005', format="%d/%m/%Y"), yend=150), linetype="dashed", color="black") +
  annotate("text", as.Date('16/05/2005', format="%d/%m/%Y"), y=180, label = "Grillo suggests \n use of Meetup") +
  # V-Day
  geom_segment(aes(x = as.Date('08/09/2007', format="%d/%m/%Y"), y=80, xend=as.Date('08/05/2007', format="%d/%m/%Y"), yend=300), linetype="dashed", color="black") +
  annotate("text", as.Date('08/05/2007', format="%d/%m/%Y"), y=320, label = "'V-Day'") +
  # Referendum
  geom_segment(aes(x = as.Date('04/12/2016', format="%d/%m/%Y"), y=900, xend=as.Date('04/09/2016', format="%d/%m/%Y"), yend=975), linetype="dotted", color="black") +
  annotate("text", as.Date('04/09/2016', format="%d/%m/%Y"), y=985, label = "Referendum") +
 
  # General Election
  geom_segment(aes(x = as.Date('24/02/2013', format="%d/%m/%Y"), y=400, xend=as.Date('24/11/2012', format="%d/%m/%Y"), yend=555), linetype="dotted", color="black") +
  annotate("text", as.Date('24/11/2012', format="%d/%m/%Y"), y=575, label = "General Election") +
  
  # Local Elections 2012
  
  geom_segment(aes(x = as.Date('06/05/2012', format="%d/%m/%Y"), y=180, xend=as.Date('06/01/2012', format="%d/%m/%Y"), yend=355), linetype="dotted", color="black") +
   annotate("text", as.Date('06/01/2012', format="%d/%m/%Y"), y=375, label = "Local Elections") +
  
  
  # NR OF GROUS IN FRONT
  
  geom_line() + 

  # SCALES, LABEL, THEME
  scale_x_date(date_breaks = "6 month", 
           limits = as.Date(c('01/01/2004', '01/01/2019'), format="%d/%m/%Y"), expand=c(0,0),
           date_labels="%b-%Y" ) +
  xlab("") + ylab("") +
  theme_bw() + theme(text = element_text(size=20), axis.text.x = element_text(angle = 45, hjust = 1))

ggsave("./../../results/figures/fig1a_cum_nr_groups.pdf", height=5, width=8.5)

# MONTHLY NUMBER OF EVENTS

ggplot(monthly, aes(x=date, y=nr)) + 
  geom_point(alpha=0.2) + 
  geom_line(alpha=0.2) +
  geom_smooth(color="black", method="loess") +
  # EVENTS
  # Referendum
  geom_segment(aes(x = as.Date('04/12/2016', format="%d/%m/%Y"), y=0, xend=as.Date('04/12/2016', format="%d/%m/%Y"), yend=4200), linetype="dotted", color="gray") +
  annotate("text", as.Date('04/12/2016', format="%d/%m/%Y"), y=4300, label = "Referendum") +
  # SCALES ETC
  scale_x_date(date_breaks = "6 month", 
           limits = as.Date(c('01/01/2004', '01/01/2019'), format="%d/%m/%Y"), expand=c(0,0),
           date_labels="%b-%Y" ) +
  xlab("") + ylab("") +
    theme_bw() + theme(text = element_text(size=20), axis.text.x = element_text(angle = 45, hjust = 1))

ggsave("./../../results/figures/fig1b_nr_events_month.pdf", height=5, width=8.5)

# Text Analysis (Figure 3, Figure A.1, Table A.2) ----

# load data

textanalysis <- readtext("./../../data_original/M5S/0_events_20181206.csv", text_field = "event_description")

# basic cleaning
textanalysis$text <- tolower(textanalysis$text)
textanalysis$text <- stri_replace_all(textanalysis$text, " ", regex = "<.*?>")   # remove html tags
textanalysis$text <- gsub("l'", " ", textanalysis$text) # remove shortened article
textanalysis$text <- stri_trans_general(textanalysis$text, "Latin-ASCII") # remove umlaute: già -> gia

textanalysis$event_year <- as.numeric(substr(textanalysis$time,1,4))
# some missing
textanalysis$event_year_sub <- as.numeric(substr(textanalysis$event_created,1,4))
textanalysis$event_year <- ifelse(is.na(textanalysis$event_year), textanalysis$event_year_sub, textanalysis$event_year)
textanalysis$event_month <- as.numeric(substr(textanalysis$time,6,7))
textanalysis$event_day <- as.numeric(substr(textanalysis$time,9,10))
textanalysis$event_date <- as.Date(paste(textanalysis$event_year, textanalysis$event_month, textanalysis$event_day, sep="-"))

textanalysis <- textanalysis %>% dplyr::filter(!is.na(event_year))

corp <- corpus(textanalysis)  # build a new corpus from the texts
summary(corp)

texts(corp)[1]

# tokenization: remove stopwords etc.

toks <- tokens(corp, remove_punct=TRUE, remove_url=TRUE, remove_symbols = TRUE) # do not yet remove numbers --> m 5 stelle..

toks <- tokens_compound(toks, pattern = phrase(c(
  'beppe grillo', 
  'movimento 5 stelle', '5 stelle', 'movimento cinque stelle', 'cinque stelle',
  'raccolta firme', 'raccolta di firme')))

toks <- tokens(toks, remove_numbers = TRUE) # after compounding, remove numbers

stopwords_it <- stri_trans_general(stopwords('it'), "Latin-ASCII") # remove umlaute: già -> gia
stopwords_broad <- c(stopwords_it,
                     'quindi',
                     "c'è",
                     "c'e",
                     'de',
                     'ogni',
                     'poi',
                     'ancora',
                     'può',
                     'puo',
                     'qui',
                     'solo',
                     'link',
                     'gt',
                     'pdf',
                     'and',
                     'amp',
                     'for',
                     'more',
                     'information',
                     'see',
                     'n',
                     'gennaio', 	'luglio', 	'febbraio',  	'agosto', 	'marzo', 	'settembre', 	
                     'aprile', 	 	'ottobre', 	'maggio', 	 	'novembre', 	'giugno', 	 	'dicembre',
                     'varie',
            'eventuali',
            'odg',
            'o.d.g.',
            'o.d.g',
            'ore',
            'via',
            'masked',
            'giorno',
            'meetup',
            'http',
            'the',
            '00',
            'of',
            'san',
            'essere',
            'fare',
            'cosa',
            'sempre',
            'due',
            'primo',
            'domenica', 'sabato', 'venerdi', 'giovedi', 'mercoledi', 'martedi', 'lunedi',
            'dopo',
            'altri', 'altre',
            'seguente',
            'serata',
            'dare',
            'lt',
            'It',
            'gia',
            'senza',
            'prossimo', 'prossime',
            'data', 'ora', 'settimana',
            'quali',
            'inoltre',
            'nuova', 'nuovo',
            'marco',
            'fatto',
            'tutta', 'tutto', 'tutti',
            'zero',
            'ecc'
)

toks <- tokens_remove(toks, pattern = stopwords_broad)

dfm <- dfm(toks)
dfmy <- dfm_group(dfm, groups="event_year")

topfeatures(dfm, 50)
ndoc(dfm)
nfeat(dfm)
topfeatures(dfmy, 300)


dfm_prop <- dfm_weight(dfm, scheme  = "prop")
dfmy_prop <- dfm_group(dfm_prop, groups="event_year")

dfm_uniqueness <- dfm_tfidf(dfm)

freq <- textstat_frequency(dfmy, n = 30, groups = "event_year")

freq <- as.data.frame(freq)

freq$color <- ifelse(freq$feature=="referendum", "red", "grey")
freq$color <- ifelse(freq$feature=="m5s" | freq$feature=="movimento_5_stelle" | freq$feature=="5_stelle" | freq$feature=="cinque stelle", "yellow", freq$color)
freq$color <- ifelse(freq$feature=="beppe" | freq$feature=="grillo" | freq$feature=="beppe_grillo", "darkgreen", freq$color)
freq$color <- ifelse(freq$feature=="elezioni", "blue", freq$color)

# grouped features

feature_group <- read_excel("./../../data_original/M5S/topfeatures_groups.xls")
freq_g <- merge(freq, feature_group, by="feature", all.x=T)
head(freq_g)

freq_g$feature_group[freq_g$feature_group=="generic"] <- NA

freqplot <- freq_g %>% 
  dplyr::filter(!is.na(feature_group)) %>%
  dplyr::rename(year=group) %>%
  group_by(year, feature_group) %>%
  dplyr::summarise(frequency_group=sum(frequency)) %>%
  mutate(rank = dense_rank(frequency_group)) 

freqplot$referendum_marker <- ifelse(freqplot$feature_group=="referendum", "red", "grey")

freqplot$color <- "grey"
freqplot$color[freqplot$feature_group=="environment"] <- "darkgreen"
freqplot$color[freqplot$feature_group=="m5s"] <- "yellow"
freqplot$color[freqplot$feature_group=="referendum"] <- "darkblue"
freqplot$color[freqplot$feature_group=="populism"] <- "red"
freqplot$color[freqplot$feature_group=="directdemocracy"] <- "orange"

# combine referendum and dd

freq_g2 <- freq_g
freq_g2$feature_group2 <- freq_g2$feature_group
freq_g2$feature_group2[freq_g2$feature_group=="referendum"] <- "directdemocracy"


freqplot2 <- freq_g2 %>% 
  dplyr::filter(!is.na(feature_group)) %>%
  dplyr::rename(year=group) %>%
  group_by(year, feature_group2) %>%
  dplyr::summarise(frequency_group=sum(frequency), feature_group=first(feature_group)) %>%
  mutate(rank = dense_rank(frequency_group)) 


# color: only placeholder function, replaced with color manual
freqplot2$color <- "grey"
freqplot2$color[freqplot2$feature_group2=="environment"] <- "darkgreen"
freqplot2$color[freqplot2$feature_group2=="m5s"] <- "yellow"
freqplot2$color[freqplot2$feature_group2=="referendum"] <- "darkblue"
freqplot2$color[freqplot2$feature_group2=="populism"] <- "red"
freqplot2$color[freqplot2$feature_group2=="directdemocracy"] <- "orange"


# drop first two years for more compact plot
fig3 <- ggplot(subset(freqplot2, year>2006&year<2019), aes(y=frequency_group, x=rank)) + 
  geom_bar(aes(fill=color), position = 'dodge', stat="identity") + 
  geom_text(aes(label=feature_group2), nudge_y=5000, angle=90) +
  facet_wrap(~year) + 
  ylim(0, 20000) +
  ylab("Most Frequent Topics") + xlab("") +
  scale_fill_manual(values=c(DB_greenish, DB_grey, DB_red, DB_bluelight, DB_orange, DB_turqoise)) +
  theme(axis.text.x = element_blank(), legend.position="none")
print(fig3)
ggsave("./../../results/figures/fig3_topic_by_year.pdf", plot=fig3, height=10, width=17)

dd <- freqplot %>% dplyr::filter(feature_group=="directdemocracy" | feature_group=="referendum")

ggplot(dd, aes(x=year, y=frequency_group, fill=feature_group)) + 
  geom_bar(stat="identity") + scale_fill_manual(labels = c("general: direct democracy features", "specific: referendum | costituzione | costituzionale"), values=c(DB_red, DB_blue)) +
  ylab("Topic Frequency") + xlab("") +
  theme(legend.position = "bottom",
        legend.direction="vertical", 
        legend.title = element_blank())

ggsave("./../../results/figures/figa1_referendum_relevance.pdf", height=5, width=8.5)

# table: grouping of terms

grouping_table <- freq_g2 %>% dplyr::select(feature, feature_group2)
grouping_table <- grouping_table %>% dplyr::rename(feature_group=feature_group2)
grouping_table$feature_group[is.na(grouping_table$feature_group)] <- "generic"
grouping_table <- grouping_table[!duplicated(grouping_table$feature),]

grouping_table <- aggregate(feature ~ feature_group, grouping_table, toString) 

total <- freqplot %>% group_by(feature_group) %>% dplyr::summarise(sum=sum(frequency_group)) %>% arrange(sum)
grouping_table <- merge(grouping_table, total, by="feature_group")

grouping_table <- grouping_table %>% dplyr::select(feature_group, feature, sum) %>% arrange(desc(sum))
names(grouping_table) <- c("topic", "terms", "total nr of terms")

grouping_table <- xtable(grouping_table, digits=0)
print(grouping_table, include.rownames=FALSE, file="./../../results/tables/taba1_text_grouping.tex")


#heatmap with counts: 

meetupdata <- read.csv("./../../data_original/M5S/0_events_20181206.csv")

# To make the code flexible I simply keep only the rows I care for below:

keep <- c("group_location", "group_lat", "group_lon", "group_country", "local_date", "yes_rsvp_count")
location_df <- meetupdata[keep]

# Re-coding into year, month etc.:
location_df$year <- substring(location_df$local_date,1,4)
location_df$local_date <- as.Date(location_df$local_date)
location_df$ym <- as.yearmon(location_df$local_date)

# Let's use the sf and mapview packages to make a quick check whether all coordinates lay within Italy: 
location_df <- location_df %>% filter(group_country == "it", year<=2018)
locations_sf <- st_as_sf(location_df, coords = c("group_lon", "group_lat"), crs = 4326)

# Creating several data frames which can then be merged with geo located data for mapping:

## total amount of events per location:
n_events <- location_df %>% 
  group_by(group_location, group_lat, group_lon) %>% 
  dplyr::summarize(count = n(), mean_par = mean(yes_rsvp_count)) %>% 
  arrange(desc(count))

n_events_years <- location_df %>% 
  group_by(group_location, group_lat, group_lon, year) %>% 
  dplyr::summarize(count = n(), mean_par = mean(yes_rsvp_count)) %>% 
  arrange(desc(count))


#GGMAPPING:

##To properly use ggmap we need a free google API key:

register_google(key = "AIzaSyDcG5fM9EJOHMOS9mz2eon8pbVXshpyVbM")
has_google_key()

##Produce some maps: 

###Foucs on Italy: 
geocode("rome")
map <- get_googlemap(center = c(12.9, 41.3), zoom = 6)

###Style of maps: 
it_map <- get_googlemap(center = c(12.9, 41.3), zoom = 6,
                        color = "bw",
                        style = "feature:road|visibility:off&style=element:labels|visibility:off&style=feature:administrative|visibility:on")

###Create heatmap: 
ggmap(it_map) + geom_point(data = n_events,
                           aes(x = group_lon, 
                               y = group_lat, 
                               size = count),
                           color = "black", alpha = .6) +
  stat_density2d(data = location_df,
                 aes(x = group_lon, y = group_lat,fill = ..level.., alpha = ..level..), 
                 geom = "polygon") +
  scale_fill_gradient(low = "green", high = "red") +
  scale_alpha(range = c(0, .5), guide = FALSE) +
  theme(axis.ticks = element_blank(),
        axis.text = element_blank(),
        axis.title = element_blank(),
        legend.position="none") 

ggsave("./../../results/figures/fig2a_heatmap.pdf", width=9.7, height=6)

